Ggtree notes

Author

Claudia Zirión-Martínez

Published

May 29, 2025

Setup

Libraries

Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)

Paths

Code
metadata_path <- 
    "data/processed/metadata_ashton_desj_all_weavepop_H99.csv"
duplications_path <- 
    "results/tables/duplications.tsv"
merged_tree_path <- 
    "data/processed/tree_merged.newick"

Include color palettes from R script

Code
source("scripts/metadata_colors.R")

Example of color palette objects

Code
lineage_colors
     VNBI     VNBII       VNI      VNII 
"#1B9E77" "#D95F02" "#7570B3" "#E7298A" 

Tree

Read tree

Code
tree_original <- read.tree(merged_tree_path)

Metadata

Read and organize metadata.

To add metadata to a ggtree object (ggtree(tree) %<+% metadata), the first column of the metadata table must contain the labels in tree$tip.label.

Code
metadata <- read.csv(
    metadata_path,
    header = TRUE) %>%
    select(strain, sample, source, lineage, country_of_origin, dataset)
head(metadata)
strain sample source lineage country_of_origin dataset
20427_2#42 ERS1142739 Clinical VNI Vietnam Ashton
04CN-64-065 ERS2540972 Clinical VNI Uganda Ashton
BMD915 ERS2541156 Clinical VNI Vietnam Ashton
14892_1#7 ERS542302 Clinical VNI Vietnam Ashton
BMD942 ERS2541105 Clinical VNI Vietnam Ashton
BMD2801 ERS2541198 Clinical VNI Vietnam Ashton

Manipulate tree

Remove tips that are not in metadata$strain

Code
tree <- drop.tip(tree_original, setdiff(tree_original$tip.label, metadata$strain))

Color tips

Color the points of the tips with geom_tippoint() This works even when not all tips are in the metadata table, NA values are used when needed.

Code
ggtree(tree_original, layout = "circular") %<+%  metadata +
    geom_tippoint(aes(color = lineage), size = 2)+
    scale_color_manual(name="Lineage", values= lineage_colors)

Color labels

Add the tip labels (strain) and color them by a column in the metadata.

Code
ggtree(tree, layout = "circular") %<+%  metadata +
    geom_tiplab(aes(color = source), size = 2, align = TRUE)+
    scale_color_manual(name="Source", values= source_colors)

Identify basal nodes of clades

To identify the most basal node of each lineage, get the Most Recent Common Ancestor of a pair of strains in the lineage, they have to be two strains whose MRCA is at the base of the clade. You can also use this to identify any clade of interest.

Code
VNI_node <- getMRCA(tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(tree, c("C2","C12"))
VNBI_node <- getMRCA(tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(tree, c("MW-RSA3321","MW-RSA3179"))

nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)

Get the node number of the MRCA of each VNI sublineage

Code
VNIa4_node <- getMRCA(tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(tree, c("BMD852","04CN-64-014"))
VNIa93_node <- getMRCA(tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(tree, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(tree, c("Bt20","Bt11"))

nodes_sublineages <- data.frame(
    sublineage = c(
                "VNIa-4", "VNIa-5", "VNIa-93",
                "VNIa-32", "VNIa-X", "VNIa-Y",
                "VNIb", "VNIc"),
    mrca = c(
            VNIa4_node, VNIa5_node, VNIa93_node,
            VNIa32_node, VNIaX_node, VNIaY_node,
            VNIb_node, VNIc_node))

To print the tree with all the node labels:

Code
ggtree(tree, layout = "circular")+
    geom_text(aes(label = node), size = 3, vjust = -0.5)

Add clade labels

Code
ggtree(tree, layout = "circular") +
    geom_cladelab(data = nodes_lineages, 
                mapping = aes(node = mrca, label = lineage, color = lineage))+
    scale_color_manual(name="Lineage", values= lineage_colors)

Add node labels

Code
ggtree(tree, layout = "circular") +
        geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 4, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)

Color branches

To color the branches of clades, create groups in the tree object with groupClade().

Code
tree <- groupClade(tree, setNames(nodes_sublineages$mrca, nodes_sublineages$sublineage))

And color the tree by aes(color = group). Use scale_color_manual() with a palette to specify the colors of the clades and avoid coloring the branches that are not part of the selected clades.

Code
ggtree(tree, aes(color = group), layout = "circular", 
    size = 1) +
  scale_color_manual(
    name = "Sublineage",
    values = sublineage_colors
  )

Color branches and tips and use node labels

Code
ggtree(tree, aes(color = group), 
        layout = "circular", size = 1) %<+%  metadata +
    scale_color_manual(
        name = "Sublineage",
        values = sublineage_colors)+
    new_scale_color()+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 4, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_tippoint(aes(color = source), size = 1)+
    scale_color_manual(name = "Source", values = source_colors)

Code
ggtree(tree, layout = "circular") %<+%  metadata +
    geom_cladelab(data = nodes_sublineages, 
                mapping = aes(node = mrca, label = sublineage, color = sublineage),
                align = TRUE, face = "bold")+
    scale_color_manual(name="Sublineage", values= sublineage_colors)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 6, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    new_scale_color() +
    geom_tippoint(aes(color = source), size = 2)+
    scale_color_manual(name = "Source", values = source_colors)

Code
ggtree(tree, layout = "circular", size = 1, branch.length = "none") %<+%  metadata +
    geom_hilight(data=nodes_sublineages, aes(node=mrca, fill=sublineage), alpha = 0.5)+
        scale_fill_brewer(name = "Lineage", palette = "Dark2")+
        new_scale_fill()+
    geom_cladelab(data = nodes_sublineages, 
        mapping = aes(node = mrca, label = sublineage),
        align = TRUE, face = "bold",
        angle = "auto")

Add data as heatmap

Create a dataframe where the rownames are the tree’s tip labels, each column is a column of the heatmap and the values are the colors in the heatmap. If using categorical values for the heatmap and their order in the legend is important, convert the column to factor and set the order of the levels.

Code
country <- metadata %>%
    select(strain, country_of_origin)%>%
    column_to_rownames("strain")

country$country_of_origin <- factor(country$country_of_origin,
                                levels = names(country_colors))

To add a heamap, the function gheatmap() takes an object made with ggtree() and add the dataframe with the matrix to use in the heatmap. Use the offset option to separate the heatmap from the tree. Use the guides() function to order the legends. Use the option limits in scale_fill_manual to set the order of the legend labels to match the order of the levels in the factor.

Code
p <- ggtree(tree, branch.length = "none")  %<+%  metadata
gheatmap(p, country, width=0.1, colnames = FALSE,  
            colnames_position = "top", 
            offset=7,
            font.size = 6) +
    geom_tiplab(aes(color = lineage), size = 3)+
    scale_color_manual(name = "Lineage", values = lineage_colors)+
    scale_fill_manual(values=country_colors, 
                      name="Country", na.translate=FALSE,
                      limits = levels(country$country_of_origin))+
    guides(color = guide_legend(order = 1), fill = guide_legend(order = 2))+
    theme(legend.position = "right")